# sim_bounds.R
# Yimeng Li
# This script contains a function that generates simulated list experiment data and computes bound estimates
#   for different types of list experiments considered in "Relaxing the No Liars Assumption in List Experiment Analyses".
# Dependencies: function list_relaxed_liars
# This version: Dec 20, 2018.

# function sim_bounds
# Inputs:
# (1) probvec: vector of marginal probabilities for items (the last item is the sensitive item)
# (2) corrmat: matrix of correlations between items
# (3) J: number of control items
# (4) affirmative: TRUE if an affirmative latent response to the sensitve item is considered sensitive 
# (5) Nsim: total number of simulations
# (6) Ntotal: total number of respondents
# (7) seed: seed number to generate simulated list experiment data
# (8) liardist: distribution of liars conditional on # of control items answered affirmatively: no, constant, nonconstant
#       no liars: no liars regardless of # of control items answered affirmatively
#       constant liars: 30% liars regardless of # of control items answered affirmatively
#       nonconstant liars: 30% liars if answering affirmative to all control items, decaying at rate 0.8
#                       (e.g., 30% x 0.8 = 24% liars if answering affirmatively to all but one control item)
# (9) listtype: the type of list experiment data generated, serving as legends in graphs
# Outputs: A dataframe containing for each simulation
# (1) listtype
# (2) liardist
# (3) the identified lower bound
# (4) the identified upper bound

# Set working directory:
# setwd("")
source("list_relaxed_liars.R")

library(bindata)
library(dplyr)

sim_bounds <- function(probvec, corrmat, J, affirmative, Nsim = 2000, Ntotal = 2000, seed, liardist, listtype = NA){
  set.seed(seed)
  # Generate latent response to each item for Ntotal respondents for each of Nsim simulations:
  simData <- as.data.frame(rmvbin(Nsim*Ntotal, margprob=probvec, bincorr=corrmat, colnames=c("C1","C2","C3","C4","S")))
  # Potential outcomes under control and treatment:
  simData <- simData %>%
    mutate(y_c = C1 + C2 + C3 + C4,
           y_t = y_c + case_when(
             liardist == "no liars" ~ S,
             affirmative == TRUE & liardist == "constant liars" ~ (runif(Nsim*Ntotal) > 0.3)*S,
             affirmative == TRUE & liardist == "nonconstant liars" ~ (runif(Nsim*Ntotal) > 0.3*0.8^{4-y_c})*S,
             affirmative == FALSE & liardist == "constant liars" ~ S + (runif(Nsim*Ntotal) < 0.3)*(S == 0),
             affirmative == FALSE & liardist == "nonconstant liars" ~ S + (runif(Nsim*Ntotal) < 0.3*0.8^y_c)*(S == 0),
             TRUE ~ NA_real_))
  # Initialize vectors to store lower and upper bound estimates for simulations
  lowerbounds <- vector("double", Nsim)
  upperbounds <- vector("double", Nsim)
  # Compute lower and upper bounds for each simulation:
  for (i in 0:(Nsim-1)) {
    simData.c <- simData[(Ntotal*i+1):(Ntotal*i+Ntotal/2),]
    simData.t <- simData[(Ntotal*i+Ntotal/2+1):(Ntotal*i+Ntotal),]
    
    dist_control <- as.vector(table(simData.c$y_c))
    dist_treated <- as.vector(table(simData.t$y_t))
    
    if (length(dist_control) == J+1 & length(dist_treated) == J+2) {
      # Some estimated proportions may be smaller than 0 or larger than 1 in some simulations even though the population proportions are between 0 and 1.
      bounds_simData <- list_relaxed_liars(dist_control, dist_treated, J, affirmative, combine = FALSE, CI = FALSE, warning = FALSE)
      lowerbounds[i+1] <- bounds_simData$bounds[1]
      upperbounds[i+1] <- bounds_simData$bounds[2]
    } else {
      lowerbounds[i+1] <- NA
      upperbounds[i+1] <- NA
    }
  }
  # Output:
  return(data.frame("listtype" = listtype, "liardist" = liardist, "lower" = lowerbounds, "upper" = upperbounds))
}
